import csv
import random, time
import pandas as pd


def loadDataset(filename, split, trainingSet=[], testSet=[]):
    line = 0
    with open(filename, 'r') as f:
        lines = csv.reader(f)
        dataset = list(lines)
        for x in range(len(dataset) - 1):
            if line != 0:
                # 将数据集随机划分
                if random.random() < split:
                    trainingSet.append(dataset[x])
                else:
                    testSet.append(dataset[x])
            line = 1


if __name__ == "__main__":
    train = []
    test = []
    print('正在进行数据集随机划分...')
    start = time.time()
    loadDataset('./data/datasets.csv', 0.8, train, test)
    random.shuffle(train)
    random.shuffle(test)
    columns = ['id', 'gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'work_type', 'Residence_type',
               'avg_glucose_level', 'bmi', 'smoking_status', 'stroke']
    train_data = pd.DataFrame(data=train, columns=columns)
    train_data.to_csv('./data/datasets/train.csv', index=False)
    test_data = pd.DataFrame(data=test, columns=columns)
    test_data['predict'] = None
    test_data.to_csv('./data/datasets/test.csv', index=False)
    end = time.time()
    print('数据集划分完毕...\n-------------------')
    print('训练数据集数量: {}\n测试数据集数量: {}\n-------------------'.format(len(train), len(test)))
    print("数据可视化完成,用时 {} s".format(round(end - start, 3)))
